# -*- coding: utf-8 -*-
import copy
import random
import time
import requests
from traceback import format_exc
from pymongo import UpdateOne
from zc_core.client.mongo_client import Mongo
from zc_core.client.redis_client import Redis
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.util.done_filter import DoneFilter
from cmcc.rules import *


# 解析ItemData
def parse_item_data(sku_id, rs_text):
    settings = get_project_settings()
    jpy = PyQuery(rs_text)

    # 在售标记
    cart = jpy('div#_cart')
    if not cart:
        # 已下架
        return None

    supplier_sku_id = jpy('input#kc_input_supSn').attr('value')
    if not supplier_sku_id:
        supplier_sku_txt = jpy('div.gtitle:contains("商品编号：") + div.gcon').text()
        supplier_sku_id = match_supplier_sku_id(supplier_sku_txt)
    nav = jpy('div.breadCrumbs a')
    # 10192375 (电商平台\办公文具\办公常用工具及耗材\笔筒)
    material_code = jpy('div.gtitle:contains("物料编码：") + div.gcon').text().strip()
    sale_price = jpy('span#totalPrice').text()
    origin_price = jpy('div.gtitle:contains("市场价格：") + div.gcon span').text()
    supplier_name = jpy('div.gtitle:contains("供 应 商：") + div.gcon').text()
    supplier_id = jpy('input#kc_input_supId').attr('value')
    min_buy = jpy('input#kc_input_minBuy').attr('value')

    supplier_sku_link = jpy('span.monthSale_sp2 a:eq(1)')
    unit = jpy('span#danwei_01').text()
    images = jpy('div#spec-n1 img')

    result = ItemData()
    # -------------------------------------------------
    result['skuId'] = sku_id
    result['materialCode'] = match_material_code(material_code)
    result['skuName'] = jpy('div.breadCrumbs label').attr('title').strip()
    if images:
        result['skuImg'] = images.eq(0).attr('src').strip()
    result['catalog1Id'] = match_cat_id(nav.eq(1).attr('href'))
    result['catalog1Name'] = nav.eq(1).text().strip()
    result['catalog2Id'] = match_cat_id(nav.eq(2).attr('href'))
    result['catalog2Name'] = nav.eq(2).text().strip()
    result['catalog3Id'] = match_cat_id(nav.eq(3).attr('href'))
    result['catalog3Name'] = nav.eq(3).text().strip()
    result['salePrice'] = parse_number(sale_price)
    result['originPrice'] = parse_number(origin_price)
    result['unit'] = unit
    brand = nav.eq(4)
    if brand:
        result['brandId'] = match_brand_id(brand.attr('href'))
        result['brandName'] = brand.text().strip()
    if supplier_name and not supplier_id:
        suppliers = settings.getdict('SUPPLIERS', {})
        supplier_id = suppliers.get(supplier_name, None)
    if supplier_id:
        result['supplierId'] = supplier_id.strip()
    if supplier_name:
        sp_name = supplier_name.strip()
        result['supplierName'] = sp_name
    if supplier_sku_id:
        result['supplierSkuId'] = supplier_sku_id.strip()
        plat_code = None
        if supplier_id == '25a547ae759911e6aabc0894ef108494' or '得力' in supplier_name:
            plat_code = 'deli'
        result['supplierSkuCode'] = convert_id2code(plat_code, supplier_sku_id)
    if supplier_sku_link:
        result['supplierSkuLink'] = supplier_sku_link.attr('href').strip()
    if min_buy:
        result['minBuy'] = min_buy
    result['genTime'] = datetime.utcnow()
    # -------------------------------------------------

    return result


# 解析order列表
def parse_order_item(sku_id, rs_text):
    jpy = PyQuery(rs_text)
    supplier_name = jpy('div.gtitle:contains("供 应 商：") + div.gcon').text().strip()

    orders = list()
    need_next_page = True
    tr_list = jpy('div.dataTable tbody tr')
    if tr_list and len(tr_list):
        prev_order = None
        same_order_no = 1
        for idx, tr in enumerate(tr_list.items()):
            tds = tr('td')
            # <td colspan="4">该商品近30天无销售记录</td>
            if tds and len(tds) > 1:
                # 采购时间  2018-12-14 11:18
                order_time_str = tds.eq(3).text()
                order_time = parse_time(order_time_str, fmt='%Y-%m-%d %H:%M')
                if order_filter.to_save(order_time):
                    order = OrderItem()
                    order['skuId'] = sku_id
                    amount = match_amount(tds.eq(2).text().strip())
                    order['count'] = round(parse_number(amount))
                    order['amount'] = parse_number(tds.eq(1).text().strip())
                    # 用户  l******@zj.cmcc<br />浙江公司
                    user = tds.eq(0).html().split('<br />')
                    order['orderUser'] = user[0]
                    if user[1]:
                        order['orderDept'] = user[1]
                    else:
                        # 兼容一些采购单位名称为空的订单
                        order['orderDept'] = '未知'
                    order['supplierName'] = supplier_name
                    order['orderTime'] = order_time
                    order['batchNo'] = time_to_batch_no(order_time)
                    order['genTime'] = datetime.utcnow()
                    if prev_order and prev_order.equals(order):
                        same_order_no = same_order_no + 1
                    else:
                        same_order_no = 1
                    addition = {
                        'sameOrderNo': same_order_no,
                        'orderTimeStr': order_time_str,
                    }
                    sha1_id = build_sha1_order_id(order, addition)
                    order['id'] = sha1_id
                    order['orderId'] = sha1_id
                    orders.append(order)
                    prev_order = order
                else:
                    need_next_page = False
    else:
        need_next_page = False
        logger.info('无单[%s]', sku_id)

    return orders, need_next_page


def crawl(sku_id, order_page=1):
    try:
        # conf = {'proxy': '58.209.224.31:4236', 'cookie': 'JSESSIONID=5a2529a403feaf83e2740aaab1f7; centralSessionId=_userId-c1db073ffef143c2822e8dfb024c2192_682429adc39041a191522008506df1e8; gr-web-oscp=44799.6481.194.8149.0000; gr-web-8080=44799.6483.194.8051.0000; JSESSIONID=5a28a006fc8901fa72ad41fde8da; gr-web3-8080=44799.6746.194.8054.0000; WT_FPC=id=2863e873148be9fc54f1590493908349:lv=1593532777225:ss=1593532651549'}
        # conf = {'proxy': '117.81.150.149:4236', 'cookie': 'JSESSIONID=601d5d3f9ef81a1a189c7fd3c86f; centralSessionId=_userId-c1db073ffef143c2822e8dfb024c2192_de86e9a4bae54edb93f3888914ad2039; gr-web-oscp=44799.6486.194.8155.0000'}
        conf = {'proxy': '58.210.211.128:4236',
                'cookie': 'JSESSIONID=601d5d3f9ef81a1a189c7fd3c86f; centralSessionId=_userId-c1db073ffef143c2822e8dfb024c2192_abf4e74e3e29461bb20bac503029c1b7; gr-web-oscp=44799.6486.194.8155.0000; gr-web3-8080=44799.6746.194.8054.0000; gr-web-8080=44799.6483.194.8051.0000; JSESSIONID=6020c4dd6a8f0eabb23b87771f76; WT_FPC=id=2ebc809a29820aa7c971593533667513:lv=1593533667513:ss=1593533667513'}

        url = 'http://b2bjoy.10086.cn/oscp/goods/product/goodsDetail.html?sn={}'.format(sku_id)
        # url = 'http://b2bjoy.10086.cn/oscp/goods/product/goodsDetail.do?saleRecordFlag=1&sn={}&salePeriod=0&pageNumber_saleRecord={}&pageSize_saleRecord=5'.format(sku_id, order_page)
        headers = {
            'Host': 'b2bjoy.10086.cn',
            'Connection': 'keep-alive',
            'Cache-Control': 'max-age=0',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3756.400 QQBrowser/10.5.4039.400',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Referer': 'http://b2bjoy.10086.cn/oscp/home/main.html',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Cookie': conf.get('cookie'),
        }
        proxies = {
            'http': 'http://{}'.format(conf.get('proxy')),
            'https': 'http://{}'.format(conf.get('proxy')),
        }
        rs = requests.get(
            url=url,
            headers=headers,
            proxies=proxies,
        )
        if not rs.text or 'EOF' == rs.text:
            print('反爬：%s' % sku_id)
            print(rs.text)
            return None, None, None
        item_data = parse_item_data(sku_id, rs.text)
        order_list, need_next_page = parse_order_item(sku_id, rs.text)
        return item_data, order_list, need_next_page
    except Exception as e:
        print(format_exc())


def save_data(item, mongo):
    # 商品
    if isinstance(item, ItemData):
        if item.validate():
            batch_no = item.get('batchNo')
            item['_id'] = item.pop("skuId")
            group_list = None
            if "groupList" in item:
                group_list = item.pop("groupList")
            # 保存商品数据
            mongo.db['data_{}'.format(batch_no)].update({'_id': item['_id']}, {'$set': item}, upsert=True)
            mongo.db['item_data_pool'].update({'_id': item['_id']}, {'$set': item}, upsert=True)
            # 保存同款分组编号
            if group_list and len(group_list):
                group_list.append(item['_id'])
                mongo.db['data_{}'.format(batch_no)].update_many({'_id': {'$in': group_list}},
                                                                 {'$set': {'spuId': item.get('spuId')}}, upsert=False)
                mongo.db['item_data_pool'].update_many({'_id': {'$in': group_list}},
                                                       {'$set': {'spuId': item.get('spuId')}}, upsert=False)
            item['skuId'] = item.pop("_id")
            # 加缓存
            rds.client.sadd(rds_filter_key, sku_id)

            return item
    # 订单明细
    elif isinstance(item, list):
        year_month_bulk_map = dict()
        for data in item:
            # 补充采购单位编号
            order_dept = data.get('orderDept')
            if order_dept and not data.get('deptId'):
                data['deptId'] = md5(order_dept)
            # 计算批次编号
            batch_no = time_to_batch_no(data.get('orderTime'))
            if batch_no:
                # 20190710 -> 201907
                year_month = str(batch_no)[:-2]
                to_save = copy.deepcopy(data)
                bulk_list = year_month_bulk_map.get(year_month, [])
                bulk_list.append(UpdateOne({'_id': to_save.pop("id")}, {'$set': to_save}, upsert=True))
                year_month_bulk_map[year_month] = bulk_list
        for year_month, bulk_list in year_month_bulk_map.items():
            mongo.db['order_item_{}'.format(year_month)].bulk_write(bulk_list, ordered=False,
                                                                    bypass_document_validation=True)
        return item


rds = Redis()
mongo = Mongo()
batch_no = '20200630'
rds_filter_key = 'zc:work:cmcc:full:{}'.format(batch_no)

if __name__ == '__main__':
    settings = get_project_settings()
    mongo_filter = DoneFilter(batch_no)

    pool_list = SkuPoolDao().get_sku_pool_list(fields={'_id': 1, 'offlineTime': 1})
    print('全量：%s' % (len(pool_list)))
    random.shuffle(pool_list)
    # pool_list.reverse()
    for idx, sku in enumerate(pool_list):
        sku_id = sku.get('_id')
        offline_time = sku.get('offlineTime', 0)
        if offline_time > settings.get('MAX_OFFLINE_TIME', 2):
            print('忽略: [%s][%s]' % (sku_id, offline_time))
            continue
        # 避免重复采集
        if mongo_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
            print('已采1：%s' % sku_id)
            continue
        # 避免重复采集
        if rds.client.sismember(rds_filter_key, sku_id):
            print('已采2：%s' % sku_id)
            continue

        item_data, order_list, need_next_page = crawl(sku_id)
        if item_data:
            item_data['batchNo'] = batch_no
            save_data(item_data, mongo)
            print('%s、商品: %s' % (idx, sku_id))
        else:
            print('%s、下架: %s' % (idx, sku_id))
        if order_list:
            save_data(order_list, mongo)
            print('%s、订单: sku=%s, cnt=%s' % (idx, sku_id, len(order_list)))
            if need_next_page:
                print('%s、订单下页: sku=%s, next=%s' % (idx, sku_id, need_next_page))
        else:
            print('%s、无单: sku=%s' % (idx, sku_id))

        time.sleep(0.01)
